Exercises L3

#log-likelihood #likelihood #newton-raphson #max-likelihood-estimation

Francesco Biscaccia Carrara 9/11/2023

Exercise 3.1

For an iid random sample with with and observed sample:

draw the log-likelihood function ;
compute via the Newton-Raphson algorithm with and compare it with the uniroot function;
compute the observed information
compute the upper and lower bounds of ; draw the function and the above set.

Solution 3.1

3.1.1

So, we have only have only one parameter which is .
Then we compute the joint pdf of our sample:

Then, our likelihood function is:
and with our observed sample we have
Then,

#install.packages("latex2exp")
library(latex2exp)
obs_sample = c(5.1, 7.4, 10.9, 21.3, 12.3, 15.4, 25.4, 18.2, 17.4, 22.5)

#We define the log_likelihood function
log_likelihood_gamma = function(alpha) {
  shape = alpha
  rate = 1 
  #log_likelihood = log(prod(dgamma(data, shape = shape, rate = rate)))
  log_likelihood = sum((dgamma(obs_sample, shape = alpha, rate = rate,log = TRUE)))
  return(log_likelihood)
}

alpha_values = seq(0.1, 100, by = 0.1)
y_values = numeric()
for(a in alpha_values){
  y_values = append(y_values,log_likelihood_gamma(a))
}

plot(alpha_values,y_values,type="l", col = "blue", lwd = 2, main = TeX("Log-likelihood function $\\log(L(\\alpha))$"), xlab = TeX("$\\alpha$"), ylab = TeX("$\\log(L(\\alpha))$"))

Pasted image 20231111113111.png

3.1.2

install.packages("numDeriv") 
library(numDeriv)

newton_raphson_step = function(f,alpha){
 l_prime = grad(f,alpha)
 J = -hessian(f,alpha)[1][1]
 return(alpha + l_prime/J)
}

newton_raphson_procedure = function(f,alpha_0){
 curr_alpha = alpha_0
 new_alpha = newton_raphson_step(f,alpha_0)
 while (abs(curr_alpha-new_alpha)>1e-3){
   curr_alpha = new_alpha
   new_alpha = newton_raphson_procedure(f,new_alpha)
 }
 return(new_alpha)
}

newton_raphson_procedure(log_likelihood_gamma,1)

The Newton-Raphson algorithm gives us the best .

#install.packages("Deriv") 
library(Deriv)

log_likelihood_gamma_expression = function(x)  10 * log(1/factorial(x-1)) + (x-1)*log(prod(obs_sample)) -log(sum(obs_sample)) 
uniroot(Deriv(log_likelihood_gamma_expression,"x"),lower = 1, upper = 100,tol = 1e-3)$root

The uniroot function gives us the best .
The two candidates ( ,) are quite similar.

3.1.3

#Observed information
obs_information=-hessian(log_likelihood_gamma,best_alpha)[1][1]

We get a observed information .

3.1.4

I can't understand the task

Exercise 3.2

Suppose

is a realization of the iid random sample , with

draw the log-likelihood function and locate its maximum
from the above observed draw 20 numbers uniformly (see the sample function) and draw the log-likelihood function over (the plot in 1.)
repeat 2. and for each log-likelihood function get its maximum; then draw a histogram of all these maxima.

Solution 3.2

3.2.1

Then we compute the joint pdf of our sample:
Then, our likelihood function is:
and with our observed sample we have
Then,

library(latex2exp)
library("Deriv")

obs_sample = c(7,4,2,4,3,2,5,10,7,7,3,5,5,5,4,3,7,3,6,4)

log_likelihood_poisson <- function(data,params) {
  lambda <- params
  
  log_likelihood <- sum(dpois(data, lambda = lambda, log=TRUE))
  
  return(log_likelihood)
}


lambda_values = seq(0.1, 50, by = 0.1)
y_values = numeric()
for(l in lambda_values){
  y_values = append(y_values,log_likelihood_poisson(obs_sample,l))
}

plot(lambda_values,y_values, type="l", col = "blue", lwd = 2, main = TeX("Log-likelihood function $\\log(L(\\lambda))$"), xlab = TeX("$\\lambda$"), ylab = TeX("$\\log(L(\\lambda))$"))
  
log_likelihood_poisson_expression = function(x) log(1/prod(factorial(obs_sample))) -20*x + sum(obs_sample)*log(x) 
best_lambda = uniroot(Deriv(log_likelihood_poisson_expression,"x"),lower = 1, upper = 100,tol = 1e-3)$root
maximum = log_likelihood_poisson(obs_sample,best_lambda)

Pasted image 20231111145030.png
The maximum of the log-likelihood function is with

3.2.2

new_obs_sample=sample(obs_sample,replace = TRUE)

y_values = numeric()
for(l in lambda_values){
  y_values = append(y_values,log_likelihood_poisson(new_obs_sample,l))
}

lines(lambda_values,y_values,type="l", col = "red", lwd = 2)

Pasted image 20231111144827.png

3.3.3

We repeat this experiment 100 times.

maxima=numeric()
for(i in (1:50)){
  new_obs_sample=sample(obs_sample,replace = TRUE)
  
  new_log_likelihood_poisson <- function(params) {
    lambda <- params
    
    log_likelihood <- sum(dpois(new_obs_sample, lambda = lambda, log=TRUE))
    
    return(log_likelihood)
  }
  
  lambda_values = seq(0.1, 100, by = 0.1)
  new_y_values = numeric()
  for(l in lambda_values){
    new_y_values = append(new_y_values,new_log_likelihood_poisson(l))
  }
  
  log_likelihood_poisson_expression = function(x) log(1/prod(factorial(new_obs_sample))) -20*x + sum(new_obs_sample)*log(x) 
  best_lambda_2 = uniroot(Deriv(log_likelihood_poisson_expression,"x"),lower = 1, upper = 100,tol = 1e-3)$root
  maxima= append(maxima,best_lambda_2)
}

hist(maxima, main=TeX("Histogram of maxima $\\hat{\\lambda}$") ,xlab=TeX("Maxima $\\hat{\\lambda}$"))

Pasted image 20231111145711.png
We can see that if we use the function sample several times on our observations, we get different values for .

Exercise 3.3

Can you draw the contours as in the figure of Example 3, L3 ?
There is also a multivariate version of the Newton-Raphson version we saw in L3. Implement it in R and apply it to the problem in Exercise 3 to compute θ.
Compare your solution by that obtained using optim. (Note: in optim use the option method=‘‘BFGS’’)

Solution 3.3

Partial solution, some errors on plot.

lambda_values = seq(1, 5, by = 0.1)
beta_values = seq(10, 25, by = 0.1)

obs_sample = c(5.1, 7.4, 10.9, 21.3, 12.3, 15.4, 25.4, 18.2, 17.4, 22.5)

likelihood_weibull <- function(lambda,beta) {
  return(prod(dweibull(obs_sample, scale = beta, shape = lambda , log=FALSE)))
}

log_likelihood_weibull <- function(lambda,beta) {
  return(sum(dweibull(obs_sample, scale = beta, shape = lambda , log=TRUE)))
}

likelihood_values = numeric()
log_values = numeric()
for(l in lambda_values){
  for(b in beta_values){
    likelihood_values = append(likelihood_values,likelihood_weibull(l,b))
    log_values = append(log_values,log_likelihood_weibull(l,b))
  }
}

likelihood_matrix = matrix(likelihood_values, nrow = length(lambda_values), ncol = length(beta_values),byrow= TRUE)
log_matrix = matrix(log_values, nrow = length(lambda_values), ncol = length(beta_values),byrow= TRUE)

contour(lambda_values,beta_values,likelihood_matrix, nlevels = 30)
title(main = "Contours of the likelihood")

contour(lambda_values,beta_values,log_matrix, nlevels = 30)
title(main = "Contours of the likelihood")